Weather data

Downscaling and extracting weather variables

Author

Kaique Alves

Published

Invalid Date

Packages

library(tidyverse)
library(cowplot)
library(patchwork)
library(raster)
library(spatstat)
library(KrigR)
library(lubridate)
library(sf)
library(sp)
library(viridis)
library(ggthemes)
library(terra)
library(MetBrewer)

library(ncdf4)

White mold data

About the Data

If you have not downloaded the data necessary to run these analysis, please refer to getting started section before runnig the code below

wm_data = read.csv("data_white-mold/WhiteMoldSurveyWrangledData.csv")

Removing missing coordinates

wm_data2 = wm_data %>% 
  filter(!is.na(latitude)) 

Names of the counties in the dataset

counties = unique(wm_data2$county)
counties
[1] "Genesee"    "Niagara"    "Orleans"    "Livingston" "Wyoming"   
[6] "Ontario"    "Yates"      "Chautauqua" "Monroe"    
central_ny = c("wyoming", "livingston","ontario","yates")

Ploting the location of each field

#covert the names to lowercase
map_snap_fun = function(yearr){
counties_lc = tolower(counties)

ny_map_data = map_data('county', region = 'new york')

snap_map = ny_map_data %>% 
  filter(subregion %in% counties_lc) %>%
  mutate(region2 = case_when(subregion %in% central_ny ~ "Central lakes",
                            !subregion %in% central_ny ~ "Great lakes")) %>% 
  ggplot()+
  geom_polygon(data = ny_map_data,
               aes(x=long, y = lat, group = group),
               fill= "white",
               color = "black",
               size =0.3)+
  geom_polygon(aes(x=long, y = lat, group = group,
                   # fill=region2
                   ),
               fill= "#fafced",
               color = "black",
               size =0.3)+
  geom_point(data = wm_data2 %>% 
               group_by(subject) %>% 
               filter(dap == max(dap),
                       !is.na(wm)) %>%
               ungroup() %>% 
               filter(year == yearr) %>% 
               arrange(wm),
                size = 1,
             aes(longitude,latitude, color = wm>0))+
  coord_map(xlim = c(-80,-76.7),
            ylim = c(41.8, 43.5))+
  # scale_fill_manual(values = c("gray85", "gray70"))+
  # scale_color_colorblind(labels = c("Absent", "Present"))+
  scale_color_manual(labels = c("Absent", "Present"),
                     values = c("#009E73", "#E69F00"))+
  guides(color = guide_legend(override.aes = list(size=3)),
         fill = "none")+
  theme_minimal()+
  labs(x = "Longitude",
       y = "Latitude",
       fill = "Climate division",
       color = "White mold",
       title = paste(yearr))+
  # facet_wrap(~year, ncol = 1)+
  theme(legend.position = "right",
        legend.text = element_text(size=7),
        axis.title = element_text(size=7),
        axis.text = element_text(size=7),
        plot.title = element_text(size=10))
snap_map
}
ny_map_data = map_data('county', region = 'new york')
counties_lc = tolower(counties)
ny_map = ny_map_data %>% 
  filter(subregion %in% counties_lc) %>% 
  mutate(region2 = case_when(subregion %in% central_ny ~ "Central lakes",
                            !subregion %in% central_ny ~ "Great lakes")) %>% 
  ggplot()+
  geom_polygon(data = ny_map_data,
               aes(x=long, y = lat, group = group),
               fill= "white",
               color = "black",
               size =0.3
               )+
  geom_polygon(aes(x=long, y = lat, group = group,
                   # fill=region2
                   ),
               fill= "#fafced",
               color = "black",
               size =0.3)+
  annotate("rect", xmin = -80, xmax = -76.5,
                   ymin = 41.8, ymax = 43.5,
           color = "black",
           size = 0.3,
           alpha = 0)+
  # scale_size_manual(values = c(0.1,0.3))+
  # coord_map(xlim = c(-80,-73),
            # ylim = c(40, 45))+
  coord_map()+
   # scale_fill_manual(values = c("gray85", "gray70"))+
  theme_void()+
  labs(x = "Longitude",
       y = "Latitude",
       fill = "Climate division")+
  theme(legend.position = c(0.1,0.8))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
3.5.0.
ℹ Please use the `legend.position.inside` argument of `theme()` instead.
ny_map

  (map_snap_fun(yearr = "2006") +
   map_snap_fun(yearr = "2007")+
   map_snap_fun(yearr = "2008")+plot_layout(ncol = 2, guides = 'collect')&
     theme(legend.position = "bottom",
           legend.justification = c("left","center"))) +
  (ny_map+plot_layout(guides = 'keep')) +

  # plot_layout(ncol = 2)+
  plot_annotation(tag_levels = "A")&
  theme(legend.text = element_text(size=6),
        legend.title = element_text(size=8),
        legend.key.size = unit(0.4, 'cm'),
        plot.title = element_text(face = "bold"),
        plot.tag = element_text(face = "bold"))

ggsave("figs/maps/maps_fields.png", dpi= 900, height = 6, width = 7, bg = "white")
ggsave("figs/maps/maps_fields.pdf", dpi= 900, height = 6, width = 7, bg = "white")

ERA5

  • ERA5-Land hourly data from 1950 to present Description

    • 0.1° x 0.1°; Native resolution is 9 km

    • Global

    • Hourly

    • January 1950 to present

  • ERA5 hourly data on pressure levels from 1979 to present Description

    • Reanalysis: 0.25° x 0.25°

    • Global

    • Hourly

    • 1979 to present

  • Agrometeorological indicators from 1979 to present derived from reanalysis Description

    • 0.1° x 0.1°

    • Global

    • Daily

    • 1979 to present

  • There is a package for downloading the data using R: link

ERA5-Land hourly data from 1950 to present

Besides there is a package for downloading data from Era5, I chose using their website to download the data. The process, using their website or the R pakage require making a request and waiting a long time to get the data.

These are the codes for each variable inside the NETCDF files.

  • u10, d2m, t2m, lai_hv, lai_lv, src, skt, stl2, sp, tp, swvl1, swvl2, swvl3, swvl4

For now, we are going to use only these variables:

  • t2m: 2m temperature
  • d2m: 2m dew point temperature
  • sp: Surface pressure
  • swvl1: Soil moisture
  • stl1: Soil temperature

Relative humidity can be calculated from t2m, d2m, and sp.

Importing data

Here I load the data for each data variable and gather into a single raster object.

2m temperature

t2m_all = raster::stack("data_era5/era5_NY_2006-2008.nc", varname = "t2m")

2m dewpoint temperature

d2m_all = raster::stack("data_era5/era5_NY_2006-2008.nc", varname = "d2m")

Surface pressure

sp_all = raster::stack("data_era5/era5_NY_2006-2008.nc", varname = "sp")

soil moisture

swvl1_all = raster::stack("data_era5/era5_NY_2006-2008.nc", varname = "swvl1")

soil temperature

stl1_all = raster::stack("data_era5/era5_NY_2006-2008_2.nc", varname = "stl1")

Kringing Test

Weather data

Here I load the temperature data, select one layer (time and day) and plot it together with the New York map

r <- raster::stack("data_era5/era5_NY_2006-2008.nc", varname = "t2m")
# a = r$X2008.05.01.00.00.00
a = subset(r,1)
raster::plot(a)
points(wm_data2$longitude, wm_data2$latitude)
maps::map('county', region = 'new york', col = "#5E610B", add = TRUE)

length(names(r))
[1] 15408

NY shapefile

We will need the New York shapefile to download the digital elevation map, which should be used for kriging. I also filter only the counties that have been surveyed for white mold.

Dir.StateShp <- file.path("data_era5/test")# file.path("shape_files/nc_files")

 # ny_shape1 = readOGR("shape_files/cugir-007865/cugir-007865/cty036.shp")
 ny_shape1 = sf::read_sf("shape_files/cugir-007865/cugir-007865/cty036.shp")

 ny_shape = ny_shape1[ny_shape1$NAME %in% c(unique(wm_data2$county)
                                            # "Yates", "Steuben","Allegany", "Cattaraugus","Erie"
                                            ),] 


ggplot()+
  geom_sf(data =ny_shape )+
  geom_point(aes(wm_data2$longitude, wm_data2$latitude))+
  theme_nothing()

Masking

In this section, we create a mask to filter the dataset, retaining only data for the selected counties. The process involves the following steps:

  1. Convert the selected county shapes into a simple feature object for visualization.
ny_shape_st = st_as_sfc(ny_shape[1])
plot(ny_shape_st)

  1. Merge the geometries to create a single boundary.
ny_shape_st_nobound = st_union(ny_shape_st)
plot(ny_shape_st_nobound)

  1. Apply a buffer around the selected area to ensure a smooth mask transition.
ny_shape_buffer = st_sf(st_buffer(ny_shape_st_nobound,dist = 10000, max_cells = 500))
plot(ny_shape_buffer)

  1. Use the buffer to mask the raster, setting all selected cells to 1.
a11 =  terra::mask(a, ny_shape_buffer, updatevalue=NA, touches =T)
a11_values = values(a11)
a11_values[!is.na(values(a11))] = 1
values(a11) = a11_values

plot(a11)

  1. Convert the masked raster into a polygon. This will serve as the mask for filtering all datasets consistently.
a21 = raster::rasterToPolygons(a11, dissolve=T)
  1. Apply the mask to filter the raster dataset, ensuring only the relevant regions remain.
a2 = crop(mask(a,a21, touches =T),a21)
  1. Visualize the results:
    • The first plot shows the initial masked raster.
    • The second plot shows the mask polygon.
    • The third plot displays the filtered raster dataset.
par(mfrow=c(1,3))
plot(a11, main = "NULL raster file")
plot(a21, main = "Mask")
plot(a2, main = "Masked raster data")

ori_data_df = as.data.frame(a2, xy = T) 
colnames(ori_data_df)[3] <- "values"
ori_g= ori_data_df %>% 
  filter(!is.na(values)) %>% 
  ggplot()+
  geom_tile(aes(x, y, fill = values))+
  geom_sf(data = ny_shape1,
              # aes(x=long, y = lat, group = group),
               fill= NA,
               color = "black")+
  scale_fill_viridis(option="B")+
  coord_sf(xlim = c(-80,-76.8), ylim = c(41.8,43.5))+
  geom_point(data = wm_data2,
             shape = 21,
             color = "white",
             fill = "black", 
             aes(longitude,latitude))+
  labs(x = "Longitude",
       y = "Latitude",
       title= "Era5 original")+
  theme_minimal()
ori_g

Downloading Digital elevation model (DEM)

Function to plot the DEM

source("functions/Plot_Covs.R")

Downloading DEM

# Covs_ls <- download_DEM(
#   Train_ras = a2,
#   Target_res = .02,
#   Shape = ny_shape,
#   Dir = Dir.StateShp,
#   Keep_Temporary = TRUE
# )

Covs_ls <- CovariateSetup(
  Training =  as(a2, "SpatRaster"),
  Target = .02,
  Extent = a21,
  Dir = "data_era5",
  Keep_Global = TRUE
)
###### Downloading global covariate data
[1] "GMTED2010 covariate data already downloaded."
###### Resampling Data
# nc <- nc_open("data_era5/GMTED2010.nc")
# names(nc[['var']])
# Covs_ls = raster::stack(x = "data_era5/GMTED2010.nc", varname = "GMTED2010")
# names(Covs_ls)
# Plot_Covs(Covs_ls)
Plot.Covariates(Covs_ls)
Warning: Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.
Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.

# class(a2)
Visualization

…Using ggplot2

as.data.frame(Covs_ls[[2]], xy = T) %>%
  mutate(DEM = GMTED2010) |> 
  filter(!is.na(DEM)) %>% 
  ggplot() +
  geom_tile(aes(x,y,fill = DEM))+
  geom_sf(data = ny_shape1,
              # aes(x=long, y = lat, group = group),
               fill= NA,
               color = "black")+
   geom_point(data = wm_data2, size = 0.1,
             aes(longitude,latitude))+
  scale_fill_gradientn(colors=met.brewer("Homer2", direction = -1))+
  coord_sf(xlim = c(-80,-76.8), ylim = c(41.8,43.5))+
  labs(x = "Longitude",
       y = "Latitude",
       title= "Era5 original")+
  theme_void()+
  theme(legend.position = "none")

ggsave("figs/maps/elevation.png", dpi = 600, height = 5, width = 7, bg = "white")

Kringing

This function performs kriging of the weather variables as function of the DEM data

a2_SpatRaster = as(a2, "SpatRaster")
KrigStart <- Sys.time() 

State_Krig <- Kriging(
  Data =a2_SpatRaster, # what to krig
  Covariates_training  = Covs_ls[[1]], # covariates at training resolution
  Covariates_target  = Covs_ls[[2]], # covariates at target resolution
  Equation = "GMTED2010", # the covariate(s) we want to use
  Keep_Temporary = FALSE, # delete temporary krigs of layers
  nmax = 40, # degree of localisation
  Cores = 1, # we want to krig using three cores to speed this process up
  FileName = "State_Shape", # save the finished file as this _t2m_2008.nc
  Dir = "data_era5/kriging" # where to save the output
)
###### Checking your Kriging Specification
[1] "A file with the name State_Shape_Kriged.nc already exists in data_era5/kriging."
[1] "Loading this file for you from the disk."
[1] "A file with the name State_Shape_StDev.nc already exists in data_era5/kriging."
[1] "Loading this file for you from the disk."
KrigStop <- Sys.time() 
KrigStop-KrigStart
Time difference of 0.116817 secs
Plot.Kriged(State_Krig)

# plot(Covs_ls[[1]])

Visualization

Krigs = State_Krig$Prediction
# Krigs = State_Krig$Kriging_SE
Krig_df <- as.data.frame(Krigs[[1]], xy = TRUE)
colnames(Krig_df)[3] <- "values"



krig_g = Krig_df %>% 
  filter(!is.na(values)) %>% 
  ggplot()+
  geom_tile(aes(x, y, fill = values))+
  geom_sf(data = ny_shape1,
              # aes(x=long, y = lat, group = group),
               fill= NA,
               color = "black")+
  scale_fill_viridis(option="B")+
  # scale_color_viridis(option="B")+
  geom_point(data = wm_data2,
             shape = 21,
             color = "white",
             fill = "black",
             aes(longitude,latitude))+
  coord_sf(xlim = c(-80,-76.8), ylim = c(41.8,43.5))+
  labs(x = "Longitude",
       y = "Latitude",
       title= "Era5 kriged")

ori_g + krig_g +
  plot_layout(ncol = 1) &
theme_minimal()

Daily summaries

par(mfrow=c(1,3))
plot(mean(t2m_all[[1:24]]))
maps::map('county', region = 'new york', col = "#5E610B", add = TRUE)
plot(min(t2m_all[[1:24]]))
maps::map('county', region = 'new york', col = "#5E610B", add = TRUE)
plot(max(t2m_all[[1:24]]))
maps::map('county', region = 'new york', col = "#5E610B", add = TRUE)

# days_in_month(month)
year = 2006:2008
month = 4:10
day = 01:31
hour = as.numeric(seq(0,23,1))
data_era5 = expand_grid(year, month, day) %>% 
  dplyr::filter(month != 4 | day != 31,
         month != 6 | day != 31,
         month != 9 | day != 31)%>% 
  unite(date, year, month, day,sep="-",remove = F ) %>% 
  mutate(date = as.Date(date))

Function for calculating the summaries

mean_raster = function(i, stack_obj){
  mean(stack_obj[[i:(i+23)]])
}
min_raster = function(i, stack_obj){
  min(stack_obj[[i:(i+23)]])
}
max_raster = function(i, stack_obj){
  max(stack_obj[[i:(i+23)]])
}

Lapply

days_i = seq(1,length(data_era5$day)*24,by = 24)
time_start = Sys.time()

# dew point
aa1 = lapply(days_i, mean_raster, stack_obj= d2m_all)
d2m_mean_daily_stack = crop(mask(stack(aa1), a21),a21)
writeCDF(rast(d2m_mean_daily_stack),
         "data_era5/daily_summaries/d2m_mean_daily.nc",
         overwrite=TRUE)
# temperature
### mean
aa2 = lapply(days_i, mean_raster, stack_obj= t2m_all)
t2m_mean_daily_stack = crop(mask(stack(aa2), a21),a21)
writeCDF(rast(t2m_mean_daily_stack),
         "data_era5/daily_summaries/t2m_mean_daily.nc",
         overwrite=TRUE)
### minimum
aa3 = lapply(days_i, min_raster, stack_obj= t2m_all)
t2m_min_daily_stack = crop(mask(stack(aa3), a21),a21)
writeCDF(rast(t2m_min_daily_stack),
         "data_era5/daily_summaries/t2m_min_daily.nc",
         overwrite=TRUE)
### maximum
aa4 = lapply(days_i, max_raster, stack_obj= t2m_all)
t2m_max_daily_stack = crop(mask(stack(aa4), a21),a21)
writeCDF(rast(t2m_max_daily_stack),
         "data_era5/daily_summaries/t2m_max_daily.nc",
         overwrite=TRUE)


# Surface pressure
aa5 = lapply(days_i, mean_raster, stack_obj= sp_all)
sp_mean_daily_stack = crop(mask(stack(aa5), a21),a21)
writeCDF(rast(sp_mean_daily_stack),
         "data_era5/daily_summaries/sp_mean_daily.nc",
         overwrite=TRUE)

# Soil moisture
aa6 = lapply(days_i, mean_raster, stack_obj= swvl1_all)
swvl1_mean_daily_stack = crop(mask(stack(aa6), a21),a21)
writeCDF(rast(swvl1_mean_daily_stack),
         "data_era5/daily_summaries/swvl1_mean_daily.nc",
         overwrite=TRUE)

# soil temperature
aa7 = lapply(days_i, mean_raster, stack_obj= stl1_all)
stl1_mean_daily_stack = crop(mask(stack(aa7), a21),a21)
writeCDF(rast(stl1_mean_daily_stack),
         "data_era5/daily_summaries/stl1_mean_daily.nc",
         overwrite=TRUE)

time_end = Sys.time()
time_end -time_start
plot(d2m_mean_daily_stack$X1)
# plot(stl1_mean_daily_stack$X1)
maps::map('county', region = 'new york',  add = TRUE)

Download DEM

Kriging the daily summaries

Dir.krigd_var <- file.path("data_era5/kriged")

Dew point

d2m_mean_daily_Krig = Kriging(
  Data = as(d2m_mean_daily_stack, "SpatRaster"), # what to krig
  Covariates_training  = Covs_ls[[1]], # covariates at training resolution
  Covariates_target  = Covs_ls[[2]], # covariates at target resolution
  Equation = "GMTED2010", # the covariate(s) we want to use
  Keep_Temporary = FALSE, # delete temporary krigs of layers
  nmax = 40, # degree of localisation
  Cores = 5, # we want to krig using three cores to speed this process up
  FileName = "d2m_mean_daily_krig.nc", # save the finished file as this _t2m_2008.nc
  Dir = Dir.krigd_var#"data_era5/kriging" # where to save the output
)

Mean Temeperature

t2m_mean_daily_Krig = Kriging(
  Data = as(t2m_mean_daily_stack, "SpatRaster"), # what to krig
  Covariates_training  = Covs_ls[[1]], # covariates at training resolution
  Covariates_target  = Covs_ls[[2]], # covariates at target resolution
  Equation = "GMTED2010", # the covariate(s) we want to use
  Keep_Temporary = FALSE, # delete temporary krigs of layers
  nmax = 40, # degree of localisation
  Cores = 5, # we want to krig using three cores to speed this process up
  FileName = "t2m_mean_daily_Krig.nc", # save the finished file as this _t2m_2008.nc
  Dir = Dir.krigd_var#"data_era5/kriging" # where to save the output
)

Minimum temperature

t2m_min_daily_Krig = Kriging(
  Data = as(t2m_min_daily_stack, "SpatRaster"), # what to krig
  Covariates_training  = Covs_ls[[1]], # covariates at training resolution
  Covariates_target  = Covs_ls[[2]], # covariates at target resolution
  Equation = "GMTED2010", # the covariate(s) we want to use
  Keep_Temporary = FALSE, # delete temporary krigs of layers
  nmax = 40, # degree of localisation
  Cores = 5, # we want to krig using three cores to speed this process up
  FileName = "t2m_min_daily_Krig.nc", # save the finished file as this _t2m_2008.nc
  Dir = Dir.krigd_var#"data_era5/kriging" # where to save the output
)

Maximum temperature

t2m_max_daily_Krig = Kriging(
  Data = as(t2m_max_daily_stack, "SpatRaster"), # what to krig
  Covariates_training  = Covs_ls[[1]], # covariates at training resolution
  Covariates_target  = Covs_ls[[2]], # covariates at target resolution
  Equation = "GMTED2010", # the covariate(s) we want to use
  Keep_Temporary = FALSE, # delete temporary krigs of layers
  nmax = 40, # degree of localisation
  Cores = 5, # we want to krig using three cores to speed this process up
  FileName = "t2m_max_daily_Krig.nc", # save the finished file as this _t2m_2008.nc
  Dir = Dir.krigd_var#"data_era5/kriging" # where to save the output
)

Surface pressure

sp_mean_daily_Krig = Kriging(
  Data = as(sp_mean_daily_stack, "SpatRaster"), # what to krig
  Covariates_training  = Covs_ls[[1]], # covariates at training resolution
  Covariates_target  = Covs_ls[[2]], # covariates at target resolution
  Equation = "GMTED2010", # the covariate(s) we want to use
  Keep_Temporary = FALSE, # delete temporary krigs of layers
  nmax = 40, # degree of localisation
  Cores = 5, # we want to krig using three cores to speed this process up
  FileName = "sp_mean_daily_Krig.nc", # save the finished file as this _t2m_2008.nc
  Dir = Dir.krigd_var#"data_era5/kriging" # where to save the output
)

Soil temperature

stl1_mean_daily_Krig = Kriging(
  Data = as(stl1_mean_daily_stack, "SpatRaster"), # what to krig
  Covariates_training  = Covs_ls[[1]], # covariates at training resolution
  Covariates_target  = Covs_ls[[2]], # covariates at target resolution
  Equation = "GMTED2010", # the covariate(s) we want to use
  Keep_Temporary = FALSE, # delete temporary krigs of layers
  nmax = 40, # degree of localisation
  Cores = 5, # we want to krig using three cores to speed this process up
  FileName = "stl1_mean_daily_Krig.nc", # save the finished file as this _t2m_2008.nc
  Dir = Dir.krigd_var#"data_era5/kriging" # where to save the output
)

Soil moisture

swvl1_mean_daily_Krig = Kriging(
  Data = as(swvl1_mean_daily_stack, "SpatRaster"), # what to krig
  Covariates_training  = Covs_ls[[1]], # covariates at training resolution
  Covariates_target  = Covs_ls[[2]], # covariates at target resolution
  Equation = "GMTED2010", # the covariate(s) we want to use
  Keep_Temporary = FALSE, # delete temporary krigs of layers
  nmax = 40, # degree of localisation
  Cores = 5, # we want to krig using three cores to speed this process up
  FileName = "swvl1_mean_daily_Krig.nc", # save the finished file as this _t2m_2008.nc
  Dir = Dir.krigd_var#"data_era5/kriging" # where to save the output
)
d2m_mean_daily_Krig = stack("data_era5/kriged/d2m_mean_daily_Krig.nc")
t2m_mean_daily_Krig = stack("data_era5/kriged/t2m_mean_daily_Krig.nc")
t2m_min_daily_Krig = stack("data_era5/kriged/t2m_min_daily_Krig.nc")
t2m_max_daily_Krig = stack("data_era5/kriged/t2m_max_daily_Krig.nc")
sp_mean_daily_Krig = stack("data_era5/kriged/sp_mean_daily_Krig.nc")
swvl1_mean_daily_Krig = stack("data_era5/kriged/swvl1_mean_daily_Krig.nc")
stl1_mean_daily_Krig = stack("data_era5/kriged/stl1_mean_daily_Krig.nc")

Ploting the kingued maps (first layer)

par(mfrow=c(2,4))
plot(t2m_mean_daily_Krig$X1, main = "Mean Temp")
plot(t2m_min_daily_Krig$X1, main = "Min Temp")
plot(t2m_max_daily_Krig$X1, main = "Max Temp")
plot(d2m_mean_daily_Krig$X1, main = "Dew Point")
plot(sp_mean_daily_Krig$X1, main = "Surface Pressure")
plot(swvl1_mean_daily_Krig$X1, main = "Soil Moisture")
plot(stl1_mean_daily_Krig$X1, main = "Soil Temp")

Using ggplot

# t2m_max_daily_stack
Krigs = t2m_max_daily_stack$X1
Krig_df <- as.data.frame(Krigs[[1]], xy = TRUE)
colnames(Krig_df)[3] <- "values"

max_t_ori = Krig_df %>% 
  filter(!is.na(values)) %>% 
  ggplot()+
  geom_sf(data = ny_shape1,
              # aes(x=long, y = lat, group = group),
               fill= "white",
               color = "black")+
  geom_tile(aes(x, y, fill = values-273.15))+
  geom_sf(data = ny_shape1,
              # aes(x=long, y = lat, group = group),
               fill= NA,
               color = "black")+
  scale_fill_viridis(option="B", limits = c(5,18),breaks =seq(5, 18, by = 3))+
  # scale_color_viridis(option="B")+
  geom_point(data = wm_data2,
             shape = 21,
             color = "white",
             fill = "black",
             aes(longitude,latitude))+
  coord_sf(xlim = c(-80,-76.8), ylim = c(41.8,43.5))+
  labs(x = "Longitude",
       y = "Latitude",
       fill = "Max Temperature (°C)",
       title= "Era5 Native Resolution (0.1° x 0.1°)")
# max_t_ori
Krigs = t2m_max_daily_Krig$X1
Krig_df <- as.data.frame(Krigs[[1]], xy = TRUE)
colnames(Krig_df)[3] <- "values"



max_t_krig = Krig_df %>% 
  filter(!is.na(values)) %>% 
  ggplot()+
  geom_sf(data = ny_shape1,
              # aes(x=long, y = lat, group = group),
               fill= "white",
               color = "black")+
  geom_tile(aes(x, y, fill = values-273.15))+
  geom_sf(data = ny_shape1,
              # aes(x=long, y = lat, group = group),
               fill= NA,
               color = "black")+
  scale_fill_viridis(option="B", limits = c(5,18), breaks =seq(5, 18, by = 3))+
  # scale_color_viridis(option="B")+
  geom_point(data = wm_data2,
             shape = 21,
             color = "white",
             fill = "black",
             aes(longitude,latitude))+
  coord_sf(xlim = c(-80,-76.8), ylim = c(41.8,43.5))+
  labs(x = "Longitude",
       y = "Latitude",
       fill = "Max Temperature (°C)",
       title= "Era5 Kriged (0.02° x 0.02°)")

max_t_ori + max_t_krig +
  plot_layout(ncol = 2, guides = "collect") &
  theme_map()&
  theme(legend.position = "bottom",
        legend.text = element_text(size = 9))

# ggsave("figs/max_t.png",dpi=900, height = 8, width=14, bg = "white")

Relative humidity

How to calculate relative humidity here link and here link

\[RH = \frac{es(d2m)}{es(t2m)}*100\]

\(es()\) is Saturation vapor pressure and can be calculated as

\[es(T) = \alpha_1*exp( \alpha_3*(\frac{T-t0}{T-\alpha_4}))\],

where \(T\) is the temperature, \(\alpha_1 = 611.21\), \(\alpha_2 = 17.502\), \(\alpha_3 = 32.19\).

Using these resources, I build the function to calculates RH from Temperature and dew point.

es =function(Temp){
t0 = 273.16
  
alpha1 = 611.21
alpha3 = 17.502
alpha4 = 32.19

alpha1*exp( alpha3*((Temp-t0)/(Temp-alpha4)))

}

#test
Tdp = 14+273.15
T2m = 31+273.15
es(Tdp)/es(T2m)*100
[1] 35.55801

Creating the raster object of relative humidity

layer_i = 1:nlayers(d2m_mean_daily_Krig)

calculate_RH = function(i, d2m_stack, t2m_stack){
  es(d2m_stack[[i]])/es(t2m_stack[[i]])*100
}

list_rh = lapply(layer_i, calculate_RH, d2m_stack = d2m_mean_daily_Krig,t2m_stack= t2m_mean_daily_Krig)

rh_mean_daily_Krig = brick(list_rh)



writeCDF(rast(rh_mean_daily_Krig),
         "data_era5/kriged/rh_mean_daily_Krig.nc",
         overwrite=TRUE)
rh_mean_daily_Krig = stack("data_era5/kriged/rh_mean_daily_Krig.nc")
plot(rh_mean_daily_Krig$X1, main = "Relative Humidity")

Extracting data

wm_data2_uni = wm_data2 %>% 
  group_by(subject) %>% 
  slice(1L)
coords<-data.frame(lon=wm_data2_uni$longitude, lat=wm_data2_uni$latitude)
coordinates(coords)<-c("lon","lat")
  • Dew point
ext_d2m = extract(d2m_mean_daily_Krig, coords)
colnames(ext_d2m) = as.character(data_era5$date)
ext_d2m_coord = cbind(longitude=wm_data2_uni$longitude, 
                      latitude=wm_data2_uni$latitude,
                      ext_d2m )

d2m_mean_wm = as.data.frame(ext_d2m_coord) %>%
  mutate(subject =wm_data2_uni$subject) %>% 
  pivot_longer(3:644,
               values_to = "d2m",
               names_to = "date")%>% 
  mutate(date = as.Date(date))
d2m_mean_wm
# A tibble: 241,392 × 5
   longitude latitude subject date         d2m
       <dbl>    <dbl>   <int> <date>     <dbl>
 1     -77.9     42.9       1 2006-04-01  282.
 2     -77.9     42.9       1 2006-04-02  271.
 3     -77.9     42.9       1 2006-04-03  276.
 4     -77.9     42.9       1 2006-04-04  270.
 5     -77.9     42.9       1 2006-04-05  266.
 6     -77.9     42.9       1 2006-04-06  272.
 7     -77.9     42.9       1 2006-04-07  275.
 8     -77.9     42.9       1 2006-04-08  268.
 9     -77.9     42.9       1 2006-04-09  268.
10     -77.9     42.9       1 2006-04-10  270.
# ℹ 241,382 more rows
  • Mean Temperature
ext_t2m_mean = extract(t2m_mean_daily_Krig, coords)
colnames(ext_t2m_mean) = as.character(data_era5$date)
ext_t2m_mean_coord = cbind(longitude=wm_data2_uni$longitude,
                           latitude=wm_data2_uni$latitude,
                           ext_t2m_mean)

t2m_mean_wm = as.data.frame(ext_t2m_mean_coord) %>%
  mutate(subject =wm_data2_uni$subject)%>%
  pivot_longer(3:644,
               values_to = "t2m_mean",
               names_to = "date")%>% 
  mutate(date = as.Date(date))
  • Max Temperature
ext_t2m_max = extract(t2m_max_daily_Krig, coords)
colnames(ext_t2m_max) = as.character(data_era5$date)
ext_t2m_max_coord = cbind(longitude=wm_data2_uni$longitude,
                           latitude=wm_data2_uni$latitude,
                           ext_t2m_max)

t2m_max_wm = as.data.frame(ext_t2m_max_coord) %>%
  mutate(subject =wm_data2_uni$subject)%>%
  pivot_longer(3:644,
               values_to = "t2m_max",
               names_to = "date")%>% 
  mutate(date = as.Date(date))
  • Min Temperature
ext_t2m_min = extract(t2m_min_daily_Krig, coords)
colnames(ext_t2m_min) = as.character(data_era5$date)
ext_t2m_min_coord = cbind(longitude=wm_data2_uni$longitude,
                           latitude=wm_data2_uni$latitude,
                           ext_t2m_min)

t2m_min_wm = as.data.frame(ext_t2m_min_coord) %>%
  mutate(subject =wm_data2_uni$subject)%>%
  pivot_longer(3:644,
               values_to = "t2m_min",
               names_to = "date")%>% 
  mutate(date = as.Date(date))
  • Pressure
ext_sp = extract(sp_mean_daily_Krig, coords)
colnames(ext_sp) = as.character(data_era5$date)
ext_sp_coord = cbind(longitude=wm_data2_uni$longitude, 
                      latitude=wm_data2_uni$latitude,
                      ext_sp )

sp_mean_wm = as.data.frame(ext_sp_coord) %>%
  mutate(subject =wm_data2_uni$subject)%>%
  pivot_longer(3:644,
               values_to = "sp",
               names_to = "date")%>% 
  mutate(date = as.Date(date))
  • Soil moisture
ext_sm = extract(swvl1_mean_daily_Krig, coords)
colnames(ext_sm) = as.character(data_era5$date)
ext_sm_coord = cbind(longitude=wm_data2_uni$longitude, 
                      latitude=wm_data2_uni$latitude,
                      ext_sm )

sm_mean_wm = as.data.frame(ext_sm_coord)%>%
  mutate(subject =wm_data2_uni$subject) %>%
  pivot_longer(3:644,
               values_to = "sm",
               names_to = "date")%>% 
  mutate(date = as.Date(date))
  • Relative humidity
ext_rh = extract(rh_mean_daily_Krig, coords)
colnames(ext_rh) = as.character(data_era5$date)
ext_rh_coord = cbind(longitude=wm_data2_uni$longitude,
                     latitude=wm_data2_uni$latitude,
                     ext_rh )

rh_mean_wm = as.data.frame(ext_rh_coord)%>%
  mutate(subject =wm_data2_uni$subject)%>%
  pivot_longer(3:644,
               values_to = "rh",
               names_to = "date") %>% 
  mutate(date = as.Date(date))
  • Soil temperature
ext_st = extract(stl1_mean_daily_Krig, coords)
colnames(ext_st) = as.character(data_era5$date)
ext_st_coord = cbind(longitude=wm_data2_uni$longitude,
                     latitude=wm_data2_uni$latitude,
                     ext_st )

st_mean_wm = as.data.frame(ext_st_coord) %>% 
  mutate(subject =wm_data2_uni$subject)%>%
  pivot_longer(3:644,
               values_to = "st",
               names_to = "date") %>% 
  mutate(date = as.Date(date))

Binding data sets

weather_all_wm = d2m_mean_wm %>%
  bind_cols(t2m_mean_wm[,5],
            t2m_max_wm[,5],
            t2m_min_wm[,5],
            sp_mean_wm[,5],
            sm_mean_wm[,5],
            rh_mean_wm[,5],
            st_mean_wm[,5])
weather_all_wm
# A tibble: 241,392 × 12
   longitude latitude subject date         d2m t2m_mean t2m_max t2m_min     sp
       <dbl>    <dbl>   <int> <date>     <dbl>    <dbl>   <dbl>   <dbl>  <dbl>
 1     -77.9     42.9       1 2006-04-01  282.     284.    289.    281. 97567.
 2     -77.9     42.9       1 2006-04-02  271.     279.    285.    273. 98578.
 3     -77.9     42.9       1 2006-04-03  276.     282.    290.    277. 97387.
 4     -77.9     42.9       1 2006-04-04  270.     277.    284.    273. 97250.
 5     -77.9     42.9       1 2006-04-05  266.     273.    277.    270. 97416.
 6     -77.9     42.9       1 2006-04-06  272.     276.    283.    273. 97868.
 7     -77.9     42.9       1 2006-04-07  275.     278.    281.    275. 97193.
 8     -77.9     42.9       1 2006-04-08  268.     274.    277.    271. 97920.
 9     -77.9     42.9       1 2006-04-09  268.     275.    282.    270. 98859.
10     -77.9     42.9       1 2006-04-10  270.     278.    288.    272. 99093.
# ℹ 241,382 more rows
# ℹ 3 more variables: sm <dbl>, rh <dbl>, st <dbl>

Saving data

data_full = weather_all_wm %>%
  dplyr::select(-latitude, -longitude) %>% 
 full_join(wm_data2, by = "subject") 
  
write.csv(data_full, "data_white-mold/data_model_plus_weather.csv",row.names = F)
data_full = read.csv("data_white-mold/data_model_plus_weather.csv") %>% 
  mutate(date = as.Date(date),
         sampling.date =  as.Date(sampling.date),
         planting.date = as.Date(planting.date))

filter between planting and sampling dates

data_fill_filtered = data_full %>% 
  filter(date >= (planting.date-30) & date <= sampling.date)
write.csv(data_fill_filtered, "data_white-mold/data_model_plus_weather_filtered.csv",row.names = F)
data_fill_filtered = read.csv("data_white-mold/data_model_plus_weather_filtered.csv")

Session Info

sessionInfo()
R version 4.4.1 (2024-06-14 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 11 x64 (build 26100)

Matrix products: default


locale:
[1] LC_COLLATE=Portuguese_Brazil.utf8  LC_CTYPE=Portuguese_Brazil.utf8   
[3] LC_MONETARY=Portuguese_Brazil.utf8 LC_NUMERIC=C                      
[5] LC_TIME=Portuguese_Brazil.utf8    

time zone: America/Sao_Paulo
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices datasets  utils     methods   base     

other attached packages:
 [1] ncdf4_1.23             MetBrewer_0.2.0        terra_1.8-5           
 [4] ggthemes_5.1.0         viridis_0.6.5          viridisLite_0.4.2     
 [7] sf_1.0-19              KrigR_0.9.4            spatstat_3.3-0        
[10] spatstat.linnet_3.2-3  spatstat.model_3.3-3   rpart_4.1.23          
[13] spatstat.explore_3.3-3 nlme_3.1-164           spatstat.random_3.3-2 
[16] spatstat.geom_3.3-4    spatstat.univar_3.1-1  spatstat.data_3.1-4   
[19] raster_3.6-30          sp_2.1-4               patchwork_1.3.0       
[22] cowplot_1.1.3          lubridate_1.9.3        forcats_1.0.0         
[25] stringr_1.5.1          dplyr_1.1.4            purrr_1.0.2           
[28] readr_2.1.5            tidyr_1.3.1            tibble_3.2.1          
[31] ggplot2_3.5.1          tidyverse_2.0.0       

loaded via a namespace (and not attached):
 [1] DBI_1.2.3             pbapply_1.7-2         deldir_2.0-4         
 [4] gridExtra_2.3         s2_1.1.7              rlang_1.1.4          
 [7] magrittr_2.0.3        e1071_1.7-16          compiler_4.4.1       
[10] mgcv_1.9-1            systemfonts_1.1.0     maps_3.4.2.1         
[13] vctrs_0.6.5           ecmwfr_2.0.3          wk_0.9.4             
[16] crayon_1.5.3          pkgconfig_2.0.3       fastmap_1.2.0        
[19] labeling_0.4.3        utf8_1.2.4            rmarkdown_2.28       
[22] tzdb_0.4.0            ragg_1.3.3            automap_1.1-12       
[25] xfun_0.48             cachem_1.1.0          jsonlite_1.8.9       
[28] progress_1.2.3        goftest_1.2-3         reshape_0.8.9        
[31] spatstat.utils_3.1-1  prettyunits_1.2.0     parallel_4.4.1       
[34] R6_2.5.1              stringi_1.8.4         stars_0.6-7          
[37] Rcpp_1.0.13           iterators_1.0.14      knitr_1.48           
[40] tensor_1.5            zoo_1.8-12            snow_0.4-4           
[43] FNN_1.1.4.1           Matrix_1.7-0          splines_4.4.1        
[46] timechange_0.3.0      tidyselect_1.2.1      rstudioapi_0.17.0    
[49] abind_1.4-8           yaml_2.3.10           codetools_0.2-20     
[52] lattice_0.22-6        intervals_0.15.5      plyr_1.8.9           
[55] withr_3.0.2           evaluate_1.0.1        units_0.8-5          
[58] proxy_0.4-27          polyclip_1.10-7       xts_0.14.1           
[61] pillar_1.9.0          KernSmooth_2.23-24    renv_1.1.2           
[64] foreach_1.5.2         generics_0.1.3        spacetime_1.3-2      
[67] hms_1.1.3             munsell_0.5.1         scales_1.3.0         
[70] class_7.3-22          glue_1.8.0            mapproj_1.2.11       
[73] tools_4.4.1           grid_4.4.1            colorspace_2.1-1     
[76] cli_3.6.3             spatstat.sparse_3.1-0 gstat_2.1-2          
[79] textshaping_0.4.0     fansi_1.0.6           doSNOW_1.0.20        
[82] gtable_0.3.5          digest_0.6.37         classInt_0.4-10      
[85] htmlwidgets_1.6.4     farver_2.1.2          memoise_2.0.1        
[88] htmltools_0.5.8.1     lifecycle_1.0.4       httr_1.4.7